#######code for web-scraping####
#######you just need to change the url of your desired newspaper#######
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import datetime
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 10})
# Function to scrape a specific date
def scrape_date(date):
    # Format the URL with the date
#   url = f"https://www.daily-sun.com/archive/{date.strftime('%Y-%m-%d')}"
    url = f"https://www.dhakatribune.com/archive/{date.strftime('%Y-%m-%d')}"
    # Make a request to the website
    r = requests.get(url)
    # Parse the HTML content
    soup = BeautifulSoup(r.text, 'html.parser')
    # Find the text elements on the page
    text_elements = soup.find_all('div')  # replace 'div' with the appropriate HTML tag
    # Analyze the sentiment of each element
    sentiments = []
    for element in text_elements:
        text = element.get_text()
        sentiment = TextBlob(text).sentiment.polarity
        if sentiment != 0.0:
            sentiments.append(sentiment)
            print(f"Text: {text}")
            print(f"Sentiment: {sentiment}")
    return sentiments




# Ask for a start and end date input
start_date_input = input("Please enter the start date to scrape (in YYYY-MM-DD format): ")
end_date_input = input("Please enter the end date to scrape (in YYYY-MM-DD format): ")
start_date = datetime.datetime.strptime(start_date_input, '%Y-%m-%d')
end_date = datetime.datetime.strptime(end_date_input, '%Y-%m-%d')




# Scrape each date in the range
all_sentiments = []
current_date = start_date
while current_date <= end_date:
    print(f"Scraping date: {current_date.strftime('%Y-%m-%d')}")
    sentiments = scrape_date(current_date)
    all_sentiments.extend(sentiments)
    current_date += datetime.timedelta(days=1)


from google.colab import drive
drive.mount('/content/drive')
path='/content/drive/MyDrive/Colab Notebooks/Text Mining by RM HABIB/'



import pandas as pd


pd.DataFrame(all_sentiments).to_excel("/content/drive/MyDrive/Colab Notebooks/Text Mining by RM HABIB/Sentiment.xlsx",index=False)




plt.rcParams.update({'font.size': 8})
# Plot the sentiments
plt.figure(figsize=(5,4))
plt.hist(all_sentiments, bins='sturges',color='magenta', range=(-1, 1),edgecolor="black")
plt.title('The Daily Sun (01 January 2023 to 31 December 2023)',fontsize=8)
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.xlabel('Sentiment Score',fontsize=8)
plt.ylabel('Counts',fontsize=8)
#plt.savefig("/content/drive/MyDrive/Colab Notebooks/Text Mining by RM HABIB/1_Histogram for Sentiment Score_Daily-Sun_1.jpg", dpi=1500)
plt.show()